## Install the R packages
#BiocManager::install("scRepertoire")
#install.packages('Seurat')
#BiocManager::install("scater")
#BiocManager::install("scDblFinder")
#BiocManager::install("celldex")
#BiocManager::install("SingleR")
#BiocManager::install("clusterProfiler")
#BiocManager::install("org.Hs.eg.db")
#devtools::install_github('immunogenomics/presto')
#install.packages("hdf5r")
#remotes::install_github("10xGenomics/loupeR")
#loupeR::setup()
## Loading libraries
suppressPackageStartupMessages({
library(Seurat)
library(scDblFinder)
library(scater)
library(sctransform)
library(scRepertoire)
library(SingleCellExperiment)
library(celldex)
library(SingleR)
library(DropletUtils)
library(dplyr)
library(ggplot2)
library(data.table)
library(tidyverse)
library(loupeR)
library(cowplot)
library(clusterProfiler)
library(biomaRt)
loupeR::setup()
})
ATAC <- Read10X_h5("~/ATAC/pbmc_unsorted_3k_filtered_feature_bc_matrix.h5")
## Genome matrix has multiple modalities, returning a list of matrices for this genome
ATAC_object <- CreateSeuratObject(ATAC)
ATAC_object
## An object of class Seurat
## 117757 features across 3009 samples within 1 assay
## Active assay: RNA (117757 features, 0 variable features)
## 2 layers present: counts.Gene Expression, counts.Peaks
ATAC_object <- JoinLayers(ATAC_object)
atac_seq <- as.SingleCellExperiment(ATAC_object)
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Layer 'data' is empty
## Warning: Layer 'scale.data' is empty
atac_seq
## class: SingleCellExperiment
## dim: 117757 3009
## metadata(0):
## assays(1): counts
## rownames(117757): MIR1302-2HG FAM138A ... KI270713.1:26202-26986
## KI270713.1:36937-37838
## rowData names(0):
## colnames(3009): AAACAGCCAACAGGTG-1 AAACATGCAACAACAA-1 ...
## TTTGTGTTCACTTCAT-1 TTTGTGTTCATGCGTG-1
## colData names(4): orig.ident nCount_RNA nFeature_RNA ident
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## reducedDimNames(0):
## mainExpName: RNA
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## altExpNames(0):
## We need to set.seed() because the scDblFinder command
## uses a probabilist strategy to identify doublets. This
## means that, everytime we run the command, it will produce
## results that are slightly different. The set.seed()
## command will guarantee the same results everytime.
library(dplyr)
set.seed(123)
results <- scDblFinder(atac_seq, returnType = 'table') %>%
as.data.frame() %>%
dplyr::filter(type == 'real')
## Warning in .checkSCE(sce, coerce = is.null(samples)): Some cells in `sce` have
## an extremely low read counts; note that these could trigger errors and might
## best be filtered out
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Creating ~2408 artificial doublets...
## Dimensional reduction
## Evaluating kNN...
## Training model...
## iter=0, 428 cells excluded from training.
## iter=1, 435 cells excluded from training.
## iter=2, 441 cells excluded from training.
## Threshold found:0.408
## 149 (5%) doublets called
head(results)
## type weighted distanceToNearest distanceToNearestDoublet
## AAACAGCCAACAGGTG-1 real 0.23937491 5.557017 5.747415
## AAACATGCAACAACAA-1 real 0.42415741 4.361270 5.495526
## AAACATGCACCTGGTG-1 real 0.00000000 3.174207 32.095882
## AAACCAACACAGCCTG-1 real 0.08263374 5.557017 5.926677
## AAACCAACAGCAAGAT-1 real 0.20575437 6.939530 6.939530
## AAACCAACATTGCGAC-1 real 0.09430503 4.766809 5.374438
## distanceToNearestReal nearestClass ratio.k3 ratio.k10
## AAACAGCCAACAGGTG-1 5.557017 cell 0.3333333 0.2
## AAACATGCAACAACAA-1 4.361270 cell 0.0000000 0.4
## AAACATGCACCTGGTG-1 3.174207 cell 0.0000000 0.0
## AAACCAACACAGCCTG-1 5.557017 cell 0.3333333 0.1
## AAACCAACAGCAAGAT-1 6.996020 artificialDoublet 0.3333333 0.2
## AAACCAACATTGCGAC-1 4.766809 cell 0.0000000 0.2
## ratio.k15 ratio.k20 ratio.k25 ratio.k39 lsizes nfeatures
## AAACAGCCAACAGGTG-1 0.13333333 0.15 0.12 0.20512821 1123 437
## AAACATGCAACAACAA-1 0.46666667 0.55 0.56 0.46153846 2420 768
## AAACATGCACCTGGTG-1 0.00000000 0.00 0.00 0.00000000 421 65
## AAACCAACACAGCCTG-1 0.06666667 0.10 0.08 0.10256410 968 366
## AAACCAACAGCAAGAT-1 0.20000000 0.20 0.20 0.23076923 946 426
## AAACCAACATTGCGAC-1 0.13333333 0.10 0.08 0.07692308 1068 408
## nAbove2 src cxds_score include.in.training score
## AAACAGCCAACAGGTG-1 64 real 1.600112e-02 TRUE 0.0002391383
## AAACATGCAACAACAA-1 269 real 2.388606e-02 TRUE 0.0417344086
## AAACATGCACCTGGTG-1 13 real 1.714966e-10 TRUE 0.0001717108
## AAACCAACACAGCCTG-1 68 real 1.476810e-02 TRUE 0.0004766944
## AAACCAACAGCAAGAT-1 76 real 1.858442e-02 TRUE 0.0004033735
## AAACCAACATTGCGAC-1 77 real 1.208356e-02 TRUE 0.0010119085
## class
## AAACAGCCAACAGGTG-1 singlet
## AAACATGCAACAACAA-1 singlet
## AAACATGCACCTGGTG-1 singlet
## AAACCAACACAGCCTG-1 singlet
## AAACCAACAGCAAGAT-1 singlet
## AAACCAACATTGCGAC-1 singlet
results %>%
dplyr::count(class)
## class n
## 1 doublet 149
## 2 singlet 2860
outfile = file.path('project3_doubletFile.txt')
write.table(results, outfile, sep='\t', quote=F,
col.names=TRUE, row.names=TRUE)
keep = results %>%
dplyr::filter(class == "singlet") %>%
rownames()
ATAC_object = ATAC_object[, keep]
ATAC_object
## An object of class Seurat
## 117757 features across 2860 samples within 1 assay
## Active assay: RNA (117757 features, 0 variable features)
## 1 layer present: counts
## %MT reads
ATAC_object[["percentage_mtDNA"]] <- PercentageFeatureSet(ATAC_object, pattern="^MT-")
VlnPlot(ATAC_object, features = c("nFeature_RNA", "nCount_RNA", "percentage_mtDNA"))
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
## Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
nFeature_RNA_cutoff <- 300 # Example cutoff, adjust as needed
nCount_RNA_cutoff <- c(500, 15000) # Example cutoff, adjust as needed
percentage_mtDNA_cutoff <- 20 # Example cutoff, adjust as needed
# Create violin plots with horizontal lines
p1 <- VlnPlot(ATAC_object, features = "nFeature_RNA") +
geom_hline(yintercept = nFeature_RNA_cutoff, color = "red", linetype = "dashed") +
ggtitle("nFeature_RNA") + NoLegend()
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
p2 <- VlnPlot(ATAC_object, features = "nCount_RNA") +
geom_hline(yintercept = nCount_RNA_cutoff, color = "red", linetype = "dashed") +
ggtitle("nCount_RNA") + NoLegend()
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
p3 <- VlnPlot(ATAC_object, features = "percentage_mtDNA") +
geom_hline(yintercept = percentage_mtDNA_cutoff, color = "red", linetype = "dashed") +
ggtitle("percentage_mtDNA") + NoLegend()
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
# Arrange plots in a grid
# p1 + p2 + p3
plot_grid(p1, p2, p3, ncol = 3)
## Scatter plots
FeatureScatter(ATAC_object, feature1="nCount_RNA",
feature2="percentage_mtDNA", pt.size = 1) +
geom_hline(yintercept = percentage_mtDNA_cutoff, color = "black", linetype = "dashed")
FeatureScatter(ATAC_object, feature1="nCount_RNA",
feature2="nFeature_RNA", pt.size = 1)
ATAC_object.qc <- FetchData(ATAC_object,
vars=c("nFeature_RNA","nCount_RNA","percentage_mtDNA"))
scPlot <- ATAC_object.qc %>%
mutate(keep = if_else(nFeature_RNA > 300 &
nCount_RNA < 15000 &
percentage_mtDNA < 20, "keep", "remove")) %>%
ggplot() +
geom_point(aes(nCount_RNA, nFeature_RNA, colour=keep), alpha=.8, size = 1) +
theme_classic()
scPlot
scPlot <- ATAC_object.qc %>%
mutate(keep = if_else(nFeature_RNA > 300 &
nCount_RNA < 15000 &
percentage_mtDNA < 20, "keep", "remove")) %>%
ggplot() +
geom_point(aes(nCount_RNA, nFeature_RNA, colour=keep), alpha=.8, size = 1) +
scale_x_log10() +
scale_y_log10() +
theme_classic()
scPlot
# Filtering out poor quality cells and reads
ATAC_object <- subset(ATAC_object, subset = percentage_mtDNA < 20 & nFeature_RNA > 300 & nCount_RNA < 15000)
# After filtering
VlnPlot(ATAC_object, features = c("nCount_RNA", "nFeature_RNA", "percentage_mtDNA"),
pt.size = 0.1, log = TRUE)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
ATAC_object
## An object of class Seurat
## 117757 features across 2613 samples within 1 assay
## Active assay: RNA (117757 features, 0 variable features)
## 1 layer present: counts
ATAC_object <- FindVariableFeatures(ATAC_object, selection.method = "vst",
nfeatures = 2000)
## Finding variable features for layer counts
top10 <- VariableFeatures(ATAC_object, simplify = FALSE)
#top10$counts
head(top10$counts, 10)
## [1] "IGLC2" "IGKC" "GNLY" "TCF4" "IGLC1" "SOX5"
## [7] "LINC01478" "IGHM" "FCGR3A" "LINC01374"
#top10$counts
plot1 <- VariableFeaturePlot(ATAC_object, raster = FALSE)
LabelPoints(plot=plot1, points = head(top10$counts, 10), repel = TRUE) + theme(legend.position="none")
## When using repel, set xnudge and ynudge to 0 for optimal results
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
ggsave("~/ATAC/variable_genes.png", dpi = 800)
## Saving 7 x 5 in image
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
##Normalize
#ATAC_object <- SCTransform(ATAC_object)
ATAC_object <- NormalizeData(ATAC_object)
## Normalizing layer: counts
ATAC_object <- ScaleData(ATAC_object)
## Centering and scaling data matrix
ATAC_object = FindVariableFeatures(ATAC_object, selection.method="vst",
nfeatures=2000)
## Finding variable features for layer counts
top10 <- VariableFeatures(ATAC_object, simplify = F)
head(top10$counts, 10)
## [1] "IGLC2" "IGKC" "GNLY" "TCF4" "IGLC1" "SOX5"
## [7] "LINC01478" "IGHM" "FCGR3A" "LINC01374"
plot2 <- VariableFeaturePlot(ATAC_object, raster = FALSE)
LabelPoints(plot=plot2, points = head(top10$counts, 10), repel=T,
xnudge=-1, ynudge=0) + theme(legend.position="none")
## When using repel, set xnudge and ynudge to 0 for optimal results
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
##PCA & UMAP
ATAC_object <- RunPCA(ATAC_object, features = VariableFeatures(ATAC_object))
## PC_ 1
## Positive: PLXDC2, SLC8A1, LRMDA, FCN1, JAK2, TYMP, IRAK3, NAMPT, TBXAS1, RBM47
## VCAN, DMXL2, SAT1, MCTP1, ZEB2, CYBB, TNFAIP2, LRRK2, CLEC7A, LYN
## CD36, CSF3R, LYZ, LYST, TLR2, GAB2, CTSS, HCK, STX11, AC020916.1
## Negative: BCL11B, CD247, CAMK4, BACH2, BCL2, TC2N, LEF1, IL32, INPP4B, SYNE2
## IL7R, THEMIS, CD96, TRAC, LTB, ANK3, RORA, TXK, GRAP2, TRBC2
## NR3C2, CCR7, APBA2, CD69, TRBC1, MLLT3, ITGA6, LINC01934, NELL2, RASGRF2
## PC_ 2
## Positive: BCL11B, CD247, IL32, CAMK4, TC2N, INPP4B, THEMIS, IL7R, LEF1, TRAC
## PDE3B, TXK, RORA, AOAH, DPYD, FNDC3B, ARHGAP26, APBA2, LINC01934, SAMD3
## GRAP2, CTSW, TRBC2, TRBC1, SYNE2, NCALD, ANXA1, RASGRF2, MLLT3, SRGN
## Negative: BANK1, MS4A1, PAX5, AFF3, FCRL1, IGHM, EBF1, NIBAN3, OSBPL10, LINC00926
## RALGPS2, CD79A, BLK, ADAM28, AP002075.1, CD22, COL19A1, IGHD, IGKC, BCL11A
## BLNK, KHDRBS2, PLEKHG1, COBLL1, WDFY4, GNG7, CD79B, AC120193.1, HLA-DQA1, CD74
## PC_ 3
## Positive: GNLY, NKG7, PRF1, KLRD1, GZMA, GZMB, CCL5, CST7, SPON2, FGFBP2
## GZMH, ADGRG1, PDGFD, TGFBR3, KLRF1, CCL4, PPP2R2B, C1orf21, CTSW, BNC2
## PTGDR, A2M, PYHIN1, CX3CR1, IL2RB, CLIC3, FCGR3A, NCR1, CD160, SAMD3
## Negative: LEF1, BACH2, CAMK4, PDE3B, CCR7, IL7R, FHIT, INPP4B, NELL2, ANK3
## NR3C2, RASGRF2, PLCL1, LTB, MAL, PRKN, CSGALNACT1, ITGA6, AC139720.1, BCL2
## BCL11B, MLLT3, THEMIS, SLC2A3, TSHZ2, PATJ, AL589693.1, SLC16A10, PCAT1, CA6
## PC_ 4
## Positive: PAX5, MS4A1, FCRL1, GNLY, MCTP2, NKG7, LINC00926, EBF1, COL19A1, OSBPL10
## PRF1, IGHD, CD79A, MTSS1, BANK1, KLRD1, IGHM, CCL5, CD79B, GZMA
## PLEKHG1, FCER2, CST7, CD22, LARGE1, FGFBP2, ADAM28, AC120193.1, AP002075.1, GZMH
## Negative: EPHB1, CLEC4C, CUX2, FAM160A1, COL24A1, AC023590.1, NRP1, LINC00996, LINC01478, LILRA4
## LINC01374, COL26A1, PTPRS, PLXNA4, PACSIN1, SCN9A, ITM2C, TNFRSF21, SLC35F3, P3H2
## RHEX, P2RY6, SLC7A11, AC007381.1, PLD4, ZFAT, PHEX, SCN1A-AS1, CCDC50, KCNK10
## PC_ 5
## Positive: PLCB1, VCAN, VCAN-AS1, ARHGAP24, DYSF, GLT1D1, AC020916.1, PADI4, FNDC3B, CSF3R
## CREB5, LINC00937, ARHGAP26, ACSL1, MEGF9, TREM1, IRS2, SLC2A3, JUN, MCTP2
## CLMN, CRISPLD2, MIR646HG, PDE4D, LIN7A, RAB11FIP1, GNLY, ANXA1, CR1, NLRP12
## Negative: CDKN1C, IFITM3, FCGR3A, CALHM6, CSF1R, AIF1, SMIM25, CST3, HLA-DPA1, LST1
## SERPINA1, MS4A7, TCF7L2, CFD, AC104809.2, CCDC26, HMOX1, CTSL, MTSS1, FGL2
## IFI30, LRRC25, PAPSS2, COTL1, SIGLEC10, FMNL2, AC020651.2, CD68, LILRB1, HLA-DPB1
scPlot <- DimPlot(ATAC_object, reduction="pca", pt.size = 1)
scPlot
scPlot <- ElbowPlot(ATAC_object)
scPlot
scPlot_elbow <- ElbowPlot(ATAC_object, ndims=50)
scPlot_elbow
pca = ATAC_object[["pca"]]
## get the eigenvalues
evs = pca@stdev^2
total.var = pca@misc$total.variance
varExplained = evs/total.var
pca.data = data.frame(PC=factor(1:length(evs)),
percVar=varExplained*100)
pca.data$cumulVar = cumsum(pca.data$percVar)
head(pca.data, 20)
## PC percVar cumulVar
## 1 1 8.5987410 8.598741
## 2 2 3.2664226 11.865164
## 3 3 1.6953758 13.560539
## 4 4 1.5111487 15.071688
## 5 5 1.2189008 16.290589
## 6 6 0.5515784 16.842167
## 7 7 0.4920644 17.334232
## 8 8 0.4251927 17.759424
## 9 9 0.3846802 18.144105
## 10 10 0.3585930 18.502698
## 11 11 0.3043225 18.807020
## 12 12 0.2757073 19.082727
## 13 13 0.2632581 19.345985
## 14 14 0.2447057 19.590691
## 15 15 0.2379580 19.828649
## 16 16 0.2250354 20.053685
## 17 17 0.2206213 20.274306
## 18 18 0.2136148 20.487921
## 19 19 0.2006676 20.688588
## 20 20 0.1907987 20.879387
scPlot_bar <- pca.data[1:15,] %>%
ggplot(aes(x=PC, y=percVar)) +
geom_bar(stat='identity') +
geom_hline(yintercept = 1, colour="red", linetype=3) +
labs(title="Variance Explanation by PCA") +
xlab("Principal Components") +
ylab("Percentage of Explained Variance") +
theme_bw()
scPlot_bar
#gridExtra::grid.arrange(scPlot_elbow, scPlot_bar, nrow = 2)
scPlot <- pca.data[1:15,] %>%
ggplot(aes(x=PC, y=cumulVar)) +
geom_bar(stat='identity') +
geom_hline(yintercept = 80, colour="red", linetype=3) +
labs(title="Cumulative Variance Explanation by PCA") +
xlab("Principal Components") +
ylab("Cumulative Percentage of Explained Variance") +
theme_bw()
scPlot
# Heatmap of pc1 and pc2
DimHeatmap(ATAC_object, dims = 1:2, cells = 500)
DimHeatmap(ATAC_object, dims = 1:15, balanced = T)
ATAC_object <- RunTSNE(ATAC_object, dims = 1:15)
DimPlot(ATAC_object, label=T, reduction = "tsne") + NoLegend()
# Clustering according to percentage mito
FeaturePlot(ATAC_object, features=c("percentage_mtDNA"), reduction = "tsne")
#UMAP
ATAC_object <- RunUMAP(ATAC_object, dims=1:15, verbose=F)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
DimPlot(ATAC_object, label=T, reduction = "umap") + NoLegend()
# Mito check
FeaturePlot(ATAC_object, features=c("percentage_mtDNA"), reduction = "umap")
##Cluster Analysis
ATAC_object <- FindNeighbors(ATAC_object, dims=1:15)
## Computing nearest neighbor graph
## Computing SNN
ATAC_object <- FindClusters(ATAC_object, resolution = 0.125)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2613
## Number of edges: 99016
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9629
## Number of communities: 8
## Elapsed time: 0 seconds
DimPlot(ATAC_object, reduction = "tsne")
# UMAP clusters
DimPlot(ATAC_object, reduction="umap")
##Find Markers
ATAC_object <- JoinLayers(ATAC_object)
ATAC_object.markers = FindAllMarkers(ATAC_object, only.pos=TRUE,
min.pct=0.25, logfc.threshold=0.25)
## Calculating cluster 0
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
#View(ATAC_object.markers)
topMarkers <- ATAC_object.markers %>%
group_by(cluster) %>%
top_n(n=5, wt=avg_log2FC)
DoHeatmap(ATAC_object, features = topMarkers$gene) + NoLegend()
## Warning in DoHeatmap(ATAC_object, features = topMarkers$gene): The following
## features were omitted as they were not found in the scale.data slot for the RNA
## assay: AC104370.1, FCRL6, CD28
##Annotation
ref = celldex::HumanPrimaryCellAtlasData()
ref
## class: SummarizedExperiment
## dim: 19363 713
## metadata(0):
## assays(1): logcounts
## rownames(19363): A1BG A1BG-AS1 ... ZZEF1 ZZZ3
## rowData names(0):
## colnames(713): GSM112490 GSM112491 ... GSM92233 GSM92234
## colData names(3): label.main label.fine label.ont
#ref1 = celldex::BlueprintEncodeData()
#ref1
my.ATAC = as.SingleCellExperiment(ATAC_object)
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
## Found more than one class "package_version" in cache; using the first, from namespace 'SeuratObject'
## Also defined by 'alabaster.base'
pred = SingleR(my.ATAC, ref=ref, labels=ref$label.main)
table(pred$labels)
##
## B_cell GMP HSC_-G-CSF Monocyte
## 257 3 1 991
## NK_cell Pro-B_cell_CD34+ T_cells
## 128 2 1231
##Annotate
#Label Annotation
ATAC_object$SingleR.labels <- pred$labels[match(rownames(ATAC_object@meta.data),
rownames(pred))]
DimPlot(ATAC_object, group.by = "SingleR.labels", reduction = "tsne", label = TRUE)
DimPlot(ATAC_object, group.by = "SingleR.labels", reduction = "umap", label = TRUE)
# find all markers of cluster 1
cluster.markers <- FindAllMarkers(ATAC_object)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
head(cluster.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## PLXDC2 0 6.039091 0.982 0.039 0 0 PLXDC2
## SLC8A1 0 6.199935 0.955 0.032 0 0 SLC8A1
## LRMDA 0 5.771247 0.938 0.031 0 0 LRMDA
## FCN1 0 5.043945 0.966 0.082 0 0 FCN1
## TBXAS1 0 4.479807 0.921 0.070 0 0 TBXAS1
cluster.markers %>% group_by(cluster) %>% dplyr::filter(avg_log2FC > 1)
## # A tibble: 10,821 × 7
## # Groups: cluster [8]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0 6.04 0.982 0.039 0 0 PLXDC2
## 2 0 6.20 0.955 0.032 0 0 SLC8A1
## 3 0 5.77 0.938 0.031 0 0 LRMDA
## 4 0 5.04 0.966 0.082 0 0 FCN1
## 5 0 4.48 0.921 0.07 0 0 TBXAS1
## 6 0 6.06 0.873 0.028 0 0 IRAK3
## 7 0 6.00 0.871 0.026 0 0 DMXL2
## 8 0 5.17 0.92 0.091 0 0 NAMPT
## 9 0 5.09 0.875 0.049 0 0 RBM47
## 10 0 4.96 0.873 0.051 0 0 MCTP1
## # ℹ 10,811 more rows
# Markers for b cells
VlnPlot(ATAC_object, features = c("IGHA1","CRIP2", "ITGB1", "FCRL5", "FGR" ))
VlnPlot(ATAC_object, features = c("CD79A", "CD79B", "IL4R","IGKV3-11", "IGHV5-51",
"IGLV3-19"))
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of IGHV5-51.
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of IGLV3-19.
# Markers for raw counts
VlnPlot(ATAC_object, features = c("MTRNR2L12", "RPS26"))
VlnPlot(ATAC_object, features = c("TCL1A", "SNX9"))
DotPlot(ATAC_object, features = topMarkers$gene[1:10]) + RotatedAxis()
p <- DotPlot(ATAC_object, features = topMarkers$gene, group.by = "SingleR.labels") + RotatedAxis()
p + coord_flip()
write.csv(ATAC_object@meta.data, "~/ATAC/3K_ATAC_OBJECT.csv")